import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import pandas_profiling
import random
import plotly.io as pio
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objs as go
from plotly import tools, subplots
import plotly
plotly.offline.init_notebook_mode()
pio.templates.default = "plotly_dark"
# load data
data = pd.read_csv('/home/prodigalson/J Files/Project - Time Series/data.csv', index_col=None, delimiter=";")
data['DOR'] = pd.to_datetime(data['DOR'])
data['Age'] = data['Age'].round().astype('Int64')
data.fillna({'Age': 0}, inplace=True)
ff.create_table(data.head())
data.profile_report()
# month wise cases
data['MnthWse'] = data['DOR'].map(lambda x: x.strftime('%Y-%m'))
mCnt = data[data['Sex'] == 'M']['MnthWse'].value_counts().sort_index()
fCnt = data[data['Sex'] == 'F']['MnthWse'].value_counts().sort_index()
totl = data['MnthWse'].value_counts().sort_index()
# year wise cases
data['YrWse'] = data['DOR'].map(lambda x: x.strftime('%Y'))
ymCnt = data[data['Sex'] == 'M']['YrWse'].value_counts().sort_index()
yfCnt = data[data['Sex'] == 'F']['YrWse'].value_counts().sort_index()
ytotl = data['YrWse'].value_counts().sort_index()
ymCnt
fig = go.Figure()
fig.add_trace(go.Scatter(x=mCnt.index, y=mCnt,
mode='lines',
name='Male'))
fig.add_trace(go.Scatter(x=fCnt.index, y=fCnt,
mode='lines',
name='Female'))
fig.add_trace(go.Scatter(x=totl.index, y=totl,
mode='lines',
name='Total'))
fig.update_layout(
title={'text': "Trivandrum - Reported Cases Over Time",
'y': 0.95,
'x': 0.05,
'xanchor': 'left',
'yanchor': 'top'},
xaxis_title="Year",
yaxis_title="Number of Patients",
xaxis_tickformat='%Y',
font=dict(
family="Courier New, monospace",
size=15,
color="#7f7f7f"
)
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()
# data summary
sumry = pd.DataFrame(data.MnthWse.value_counts().sort_index()).reset_index()
sumry.columns = ['MnthWse', 'Cnt']
sumry['MnthWse'] = pd.to_datetime(sumry['MnthWse'], format='%Y-%m')
sumry['Mnth'] = sumry['MnthWse'].map(lambda x: x.strftime('%m'))
sumry['Year'] = sumry['MnthWse'].map(lambda x: x.strftime('%Y'))
dataSumry = pd.pivot_table(sumry, values="Cnt",
columns="Year", index="Mnth")
dataSumry.columns
ff.create_table(dataSumry.reset_index())
fig = go.Figure(data=[
go.Bar(name='Female',
x=dataSumry.columns,
y=yfCnt,
marker_color='DarkOrange'),
go.Bar(name='Male',
x=dataSumry.columns,
y=ymCnt,
marker_color='dodgerblue')
])
total_labels = [{"x": x, "y": total*1.05, "text": str(total), "showarrow": False} for x, total in zip(dataSumry.columns, ytotl)]
fig.update_layout(barmode='stack',annotations=total_labels,
title={
'text': "Cases in each Year (Gender Wise)",
'y': 0.95,
'x': 0.03,
'xanchor': 'left',
'yanchor': 'top'},
font=dict(
family="Courier New, monospace",
size=15,
color="#7f7f7f"
))
fig.show()
fig = go.Figure()
for i in dataSumry.columns:
fig.add_trace(go.Scatter(x=dataSumry.index, y=dataSumry[i],
mode='lines',
name=i))
fig.update_layout(
title={'text': "Trivandrum - Reported Cases Over Time (Year wise)",
'y': 0.9,
'x': 0.05,
'xanchor': 'left',
'yanchor': 'top'},
yaxis_title="Number of Patients",
xaxis=dict(
tickmode='array',
tickvals=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
),
font=dict(
family="Courier New, monospace",
size=15,
color="#7f7f7f"
)
)
fig.show()
N = 12
mnth = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
c = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0, 360, N)]
fig = go.Figure(data=[go.Box(
y=dataSumry.iloc[i],
marker_color=c[i],
name=j,
) for i, j in zip(range(int(N)), mnth)])
fig.update_layout(title={'text': "Box plot of cases (Month Wise)",
'y': 0.95,
'x': 0.03,
'xanchor': 'left',
'yanchor': 'top'},
font=dict(
family="Courier New, monospace",
size=15,
color="#7f7f7f"
),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=True),
yaxis=dict(zeroline=False, gridcolor='blue'),
)
fig.show()
labels = ["Male", "Female"]
values = data['Sex'].value_counts()
fig = go.Figure()
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.6)])
fig.update_layout(
title={'text': "Distribution of Gender",
'y': 0.9,
'x': 0.05,
'xanchor': 'left',
'yanchor': 'top'},
font=dict(family="Courier New, monospace",
size=15,
color="#7f7f7f")
)
fig.show()
hist_data = [data['Age'].tolist()]
group_labels = ['Age']
fig = ff.create_distplot(hist_data, group_labels)
fig.update_layout(
title={'text': "Distribution of Age",
'y': 0.9,
'x': 0.05,
'xanchor': 'left',
'yanchor': 'top'},
xaxis_title="Age",
font=dict(family="Courier New, monospace",
size=15,
color="#7f7f7f")
)
fig.show()
Area = (data['Area'].value_counts().to_frame().reset_index()
).sort_values('Area', ascending=False)
Area.columns = ['Loc', 'Cnt']
mCnt = data[data['Sex'] == 'M']['Area'].value_counts().nlargest(10).sort_values(ascending=True)
fCnt = data[data['Sex'] == 'F']['Area'].value_counts().nlargest(10).sort_values(ascending=True)
fig = go.Figure()
fig.add_trace(go.Bar(
y=mCnt.index,
x=mCnt,
name='Male',
orientation='h',
text=mCnt,
marker=dict(
color='rgba(31, 58, 147, 1)',
line=dict(color='rgba(103, 65, 114, 1)', width=0.3)
)
))
fig.add_trace(go.Bar(
y=fCnt.index,
x=fCnt,
name='Female',
orientation='h',
marker=dict(
color='rgba(241, 90, 34, 1)',
line=dict(color='rgba(103, 65, 114, 1)', width=0.3)
)
))
fig.update_layout(barmode='stack',
title={
'text': "Top 10 Areas with Reported Cases (Gender Wise)",
'y': 0.95,
'x': 0.03,
'xanchor': 'left',
'yanchor': 'top'},
font=dict(
family="Courier New, monospace",
size=15,
color="#7f7f7f"
))
fig.show()
# age groups
ageCnt = data['Age']
fageCnt = data[data['Sex'] == 'F']['Age']
mageCnt = data[data['Sex'] == 'M']['Age']
atCnt = pd.cut(ageCnt, [0, 20, 40, 60, 80, 100]).value_counts()
afCnt = pd.cut(fageCnt, [0, 20, 40, 60, 80, 100]).value_counts()
amCnt = pd.cut(mageCnt, [0, 20, 40, 60, 80, 100]).value_counts()
subgroup_size = []
for i, j in zip(amCnt, afCnt):
subgroup_size.append(i)
subgroup_size.append(j)
lbl = ['Below 20', '20 - 40', '40 - 60', '60 - 80', 'Above 80']
agegrp = go.FigureWidget()
agegrp.add_trace(go.Pie(values=atCnt,
labels=lbl,
domain={'x': [0.2, 0.8], 'y': [0.1, 0.9]},
hole=0.5,
direction='clockwise',
name='Total',
sort=False,
marker={'colors': [' #ec3e40 ', ' #4c54c2 ', ' #32a033 ', ' #900C3F', ' #ffff00 ']}))
agegrp.add_trace(go.Pie(values=subgroup_size,
labels=['>20-M', '>20-F', '20<M<40', '20<F<40', '40<M<60',
'40<F<60', '60<M<80', '60<F<80', '<80M', '<80M'],
domain={'x': [0.1, 0.9], 'y': [0, 1]},
hole=0.75,
direction='clockwise',
name='Gender Wise',
sort=False,
marker={'colors': ['#ffffff', '#999966']*5},
showlegend=False))
agegrp.update_layout(
title={
'text': "Cases in each Age Group (%)",
'y': 0.95,
'x': 0.03,
'xanchor': 'left',
'yanchor': 'top'},
font=dict(
family="Courier New, monospace",
size=15,
color="#7f7f7f"
))
agegrp.show()
# race bar plot
data['Year'] = data['DOR'].map(lambda x: x.strftime('%Y'))
qu = []
for z in mCnt.index:
uq = []
uq.append(data[data['Area'] == z]['Year'].value_counts().sort_index())
qu.append(uq)
alph = []
for z in range(6):
for i in qu:
for j in i:
alph.append(j[z])
def duplicate(testList, n):
return [ele for ele in testList for _ in range(n)]
df = pd.DataFrame({"Name": ['BALARAMAPURAM', 'POONTHURA', 'KALLIYOOR', 'THIRUVALLAM', 'NEMOM',
'VATTIYOORKKAVU', 'PALLICHAL', 'VIZHINJAM', 'KARAKULAM', 'CORPORATION']*6,
"Year": duplicate([2013, 2014, 2015, 2016, 2017, 2018], 10),
"Number": alph})
# mapping colors
def name_to_color(names, r_min=0, r_max=255, g_min=0, g_max=255, b_min=0, b_max=255):
mapping_colors = dict()
for name in names.unique():
red = random.randint(r_min, r_max)
green = random.randint(g_min, g_max)
blue = random.randint(b_min, b_max)
rgb_string = 'rgb({}, {}, {})'.format(red, green, blue)
mapping_colors[name] = rgb_string
return mapping_colors
mapping_colors = name_to_color(df.Name, 125, 255, 0, 185, 0, 185)
df['Color'] = df['Name'].map(mapping_colors)
# fn to create list of frames
def frames_animation(df, title):
list_of_frames = []
initial_year = df['Year'].min()
final_year = (df['Year'].max()+1)
for year in range(initial_year, final_year):
data = df[df['Year'] == year]
list_of_frames.append(go.Frame(data=[go.Bar(x=data['Name'], y=data['Number'],
marker_color=data['Color'], hoverinfo='none',
textposition='outside', texttemplate='%{x}<br>%{y}',
cliponaxis=False)],
layout=go.Layout(font={'size': 14,
'color': "white"},
plot_bgcolor='#111111',
xaxis={
'showline': False, 'visible': False},
yaxis={
'showline': False, 'visible': False},
bargap=0.15,
title=title + str(year))))
return list_of_frames
# fn to create bar race plot
def bar_race_plot(df, title, list_of_frames):
initial_year = df['Year'].min()
initial_names = df[df['Year'] == initial_year].Name
initial_numbers = df[df['Year'] == initial_year].Number
initial_color = df[df['Year'] == initial_year].Color
range_max = df['Number'].max()
fig = go.Figure(
data=[go.Bar(x=initial_names, y=initial_numbers,
marker_color=initial_color, hoverinfo='none',
textposition='outside', texttemplate='%{x}<br>%{y}',
cliponaxis=True)],
layout=go.Layout(font={'size': 14}, plot_bgcolor=' #111111',
xaxis={'showline': False, 'visible': False},
yaxis={'showline': False, 'visible': False,
'range': (0, range_max)},
bargap=0.15, title=title + str(initial_year),
updatemenus=[dict(type="buttons",
buttons=[dict(label="Play",
method="animate",
args=[None, {"frame": {"duration": 2000, "redraw": True}, "fromcurrent": True}]),
dict(label="Stop",
method="animate",
args=[None, {"frame": {"duration": 0, "redraw": False}, "mode": "immediate", "transition": {"duration": 0}}])])]),
frames=list(list_of_frames))
return fig
title = 'Race Bar_plot of Cases in Top 10 Areas -- '
list_of_frames = frames_animation(df, title)
fig = bar_race_plot(df, title, list_of_frames)
fig.show()